package com.feng.app.spider;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

public class Test {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		
		new Gets().start();
		
		}

}

/**
 * 利用httpget 获取网页
 * @author feng
 *
 */
class Gets extends Thread{
	HttpClient client=HttpClients.createDefault();
	
	@Override
	public void run() {
		//get方法
		HttpGet get=new HttpGet("http://www.baidu.com");
		
		try {
			
			HttpResponse response=client.execute(get);
			
			HttpEntity entity=response.getEntity();
			
			System.out.println("content type=   "+entity.getContentType());
			System.out.println("encoding=    "+entity.getContentEncoding());
			System.out.println("length=    "+entity.getContentLength());
			
			InputStream  is=entity.getContent();
			/////////////////////////////////////////////////////////////////
			Header header=entity.getContentType();							//
																											//获取名称
			String type=header.toString();												//
			type=type.substring(type.lastIndexOf("/")+1);		
			if(type.length()>4){
				System.out.println(type.substring(0, 4));
			}
			
			//
			/////////////////////////////////////////////////////////////////
			System.out.println(type);
//			SimpleDateFormat sdf=new SimpleDateFormat("yyMMddHHmmss");
//			FileOutputStream fos=new FileOutputStream(sdf.format(new Date())+"."+type);
//			
//			byte b[]=new byte[1024];
//			int l=0;
//			while((l=is.read(b))!=-1){
//				fos.write(b,0,l);
//			}
//			fos.flush();
			
			System.out.println("done");
			
			/**
			 * 经测试，buffered不行
			 */
			InputStreamReader isr=new InputStreamReader(is);
			BufferedReader br=new BufferedReader(isr);
			String line;
			while((line=br.readLine())!=null){
//				System.out.println("123");
				if(line.contains("href=\"")){
//					System.out.println(line);
					String a[]=line.split("href=\"");
					if(a[1].contains("http")){
						String urls[]=a[1].split("\"");
						System.out.println(urls[0]);
					}
				}
			}
			br.close();
			isr.close();
//			fos.close();
			is.close();
			
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
	}
	
}
